import pandas as pd
import matplotlib.pylab as plt
%matplotlib inline
plt.style.use(['bmh']) # bmh Styling used for Visulization
df = pd.read_excel("flight_attendant.xlsx")
print ('Data read into a pandas dataframe!')
Data read into a pandas dataframe!
df.head() #Calling first 5 rows
Attendant ID | Attendant Name | Rank | Contact Number | Carrier Code | Carrier Name | Country Carrier | Flight Number | Flight Date | Origin | Destination | Mileage | Pilot ID | Pilot Name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4567 | Daniel Brown | Senior | 012 5782 | BA | British Airways | United Kingdom | BA1234 | 2021-03-14 | London | Paris | 216 | 126 | Ian Stoke |
1 | 4567 | Daniel Brown | Senior | 012 5782 | BA | British Airways | United Kingdom | BA6753 | 2021-05-21 | London | Amsterdam | 209 | 672 | Geri Lawton |
2 | 4567 | Daniel Brown | Senior | 012 5782 | BA | British Airways | United Kingdom | BA1561 | 2021-08-08 | London | Brussels | 200 | 98 | Peter Clark |
3 | 4567 | Daniel Brown | Senior | 012 5782 | BA | British Airways | United Kingdom | BA9878 | 2021-10-25 | London | Bucharest | 1311 | 545 | David Roger |
4 | 1906 | Emma Austen | Junior | 013 8261 | BA | British Airways | United Kingdom | BA9123 | 2021-01-05 | London | Kyev | 1362 | 23 | Oscar Halloran |
df.dtypes #Checking Datatypes
Attendant ID int64 Attendant Name object Rank object Contact Number object Carrier Code object Carrier Name object Country Carrier object Flight Number object Flight Date datetime64[ns] Origin object Destination object Mileage int64 Pilot ID int64 Pilot Name object dtype: object
df.describe() #Understanding the data
Attendant ID | Mileage | Pilot ID | |
---|---|---|---|
count | 19.000000 | 19.000000 | 19.000000 |
mean | 3958.631579 | 2631.052632 | 397.315789 |
std | 3448.710664 | 4121.487656 | 330.787117 |
min | 213.000000 | 200.000000 | 17.000000 |
25% | 1343.500000 | 216.000000 | 98.000000 |
50% | 1906.000000 | 1014.000000 | 545.000000 |
75% | 6805.000000 | 2406.500000 | 672.000000 |
max | 9043.000000 | 17016.000000 | 888.000000 |
df.info(verbose=False) #Information about the dataframe
<class 'pandas.core.frame.DataFrame'> RangeIndex: 19 entries, 0 to 18 Columns: 14 entries, Attendant ID to Pilot Name dtypes: datetime64[ns](1), int64(3), object(10) memory usage: 2.2+ KB
To view the dimensions of the dataframe, we use the .shape parameter.
df.shape # size of dataframe (rows, columns)
(19, 14)
df1 = df.groupby(['Pilot Name'],as_index=False).max() #Using groupby function to create required pivot table
df1
Pilot Name | Attendant ID | Attendant Name | Rank | Contact Number | Carrier Code | Carrier Name | Country Carrier | Flight Number | Flight Date | Origin | Destination | Mileage | Pilot ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | David Roger | 9043 | James Biggs | Senior | 031 1241 | BA | British Airways | United Kingdom | BA9878 | 2021-12-30 | New York | Naples | 3451 | 545 |
1 | Geri Lawton | 9043 | John Garner | Senior | 031 1241 | BA | British Airways | United Kingdom | BA9123 | 2021-09-19 | London | London | 6593 | 672 |
2 | Ian Stoke | 4567 | Holly May | Senior | 013 8261 | BA | British Airways | United Kingdom | BA4545 | 2021-04-19 | London | Paris | 773 | 126 |
3 | Mary Dave | 9043 | James Biggs | Senior | 031 1241 | BA | British Airways | United Kingdom | BA3421 | 2021-09-26 | Singapore | Paris | 6765 | 734 |
4 | Oscar Halloran | 9043 | James Biggs | Senior | 031 1241 | BA | British Airways | United Kingdom | BA9123 | 2021-02-14 | London | Madrid | 1362 | 23 |
5 | Patricia Markle | 9043 | James Biggs | Senior | 031 1241 | BA | British Airways | United Kingdom | BA9878 | 2021-11-22 | Sydney | London | 17016 | 17 |
6 | Peter Clark | 4567 | Holly May | Senior | 012 9182 | BA | British Airways | United Kingdom | BA6753 | 2021-08-08 | London | Brussels | 209 | 98 |
7 | Ross Bridge | 1906 | John Garner | Junior | 024 9315 | BA | British Airways | United Kingdom | BA8177 | 2021-06-24 | London | Naples | 5995 | 888 |
grouped_pivot = df1.pivot(index='Origin',columns='Pilot Name',values='Mileage') #Creating Pivot Table
grouped_pivot
Pilot Name | David Roger | Geri Lawton | Ian Stoke | Mary Dave | Oscar Halloran | Patricia Markle | Peter Clark | Ross Bridge |
---|---|---|---|---|---|---|---|---|
Origin | ||||||||
London | NaN | 6593.0 | 773.0 | NaN | 1362.0 | NaN | 209.0 | 5995.0 |
New York | 3451.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Singapore | NaN | NaN | NaN | 6765.0 | NaN | NaN | NaN | NaN |
Sydney | NaN | NaN | NaN | NaN | NaN | 17016.0 | NaN | NaN |
grouped_pivot.plot(kind='barh',figsize=(10,5))
plt.title('Origin of Pilots and Mileage Covered',fontsize = 20)
plt.ylabel('Origin City Name',fontsize = 15)
plt.xlabel('Mileage',fontsize = 15)
plt.legend(fontsize = 10)
plt.show()
grouped_pivot2 = df1.pivot(index='Destination',columns='Pilot Name',values='Mileage') #Creating Pivot Table
grouped_pivot2
Pilot Name | David Roger | Geri Lawton | Ian Stoke | Mary Dave | Oscar Halloran | Patricia Markle | Peter Clark | Ross Bridge |
---|---|---|---|---|---|---|---|---|
Destination | ||||||||
Brussels | NaN | NaN | NaN | NaN | NaN | NaN | 209.0 | NaN |
London | NaN | 6593.0 | NaN | NaN | NaN | 17016.0 | NaN | NaN |
Madrid | NaN | NaN | NaN | NaN | 1362.0 | NaN | NaN | NaN |
Naples | 3451.0 | NaN | NaN | NaN | NaN | NaN | NaN | 5995.0 |
Paris | NaN | NaN | 773.0 | 6765.0 | NaN | NaN | NaN | NaN |
grouped_pivot2.plot(kind='barh',figsize=(10,5))
plt.title('Destination of Pilots and Mileage Covered',fontsize = 20)
plt.ylabel('Destination City Name',fontsize = 15)
plt.xlabel('Mileage',fontsize = 15)
plt.legend(fontsize = 10)
plt.show()
df2 = df.groupby(['Attendant Name'],as_index=False).max() #Using groupby function to create required pivot table
df2
Attendant Name | Attendant ID | Rank | Contact Number | Carrier Code | Carrier Name | Country Carrier | Flight Number | Flight Date | Origin | Destination | Mileage | Pilot ID | Pilot Name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Daniel Brown | 4567 | Senior | 012 5782 | BA | British Airways | United Kingdom | BA9878 | 2021-10-25 | London | Paris | 1311 | 672 | Peter Clark |
1 | Emma Austen | 1906 | Junior | 013 8261 | BA | British Airways | United Kingdom | BA9123 | 2021-12-30 | Sydney | Naples | 17016 | 888 | Ross Bridge |
2 | Holly May | 213 | Junior | 012 9182 | BA | British Airways | United Kingdom | BA6753 | 2021-09-26 | London | Paris | 216 | 734 | Peter Clark |
3 | James Biggs | 9043 | Senior | 031 1241 | BA | British Airways | United Kingdom | BA9878 | 2021-10-10 | Singapore | Naples | 6765 | 734 | Patricia Markle |
4 | John Garner | 781 | Junior | 024 9315 | BA | British Airways | United Kingdom | BA5656 | 2021-08-10 | Kuala Lumpur | London | 6593 | 888 | Ross Bridge |
grouped_pivot3 = df2.pivot(index='Origin',columns='Attendant Name',values='Mileage') #Creating Pivot Table
grouped_pivot3
Attendant Name | Daniel Brown | Emma Austen | Holly May | James Biggs | John Garner |
---|---|---|---|---|---|
Origin | |||||
Kuala Lumpur | NaN | NaN | NaN | NaN | 6593.0 |
London | 1311.0 | NaN | 216.0 | NaN | NaN |
Singapore | NaN | NaN | NaN | 6765.0 | NaN |
Sydney | NaN | 17016.0 | NaN | NaN | NaN |
grouped_pivot3.plot(kind='barh',figsize=(10,5))
plt.title('Origin of Flight Attendants and Mileage Covered',fontsize = 20)
plt.ylabel('Origin City Name',fontsize = 15)
plt.xlabel('Mileage',fontsize = 15)
plt.legend(fontsize = 10)
plt.show()
grouped_pivot4 = df2.pivot(index='Destination',columns='Attendant Name',values='Mileage') #Creating Pivot Table
grouped_pivot4
Attendant Name | Daniel Brown | Emma Austen | Holly May | James Biggs | John Garner |
---|---|---|---|---|---|
Destination | |||||
London | NaN | NaN | NaN | NaN | 6593.0 |
Naples | NaN | 17016.0 | NaN | 6765.0 | NaN |
Paris | 1311.0 | NaN | 216.0 | NaN | NaN |
grouped_pivot4.plot(kind='barh',figsize=(10,5))
plt.title('Destination of Flight Attendant and Mileage Covered',fontsize = 20)
plt.ylabel('Destination City Name',fontsize = 15)
plt.xlabel('Mileage',fontsize = 15)
plt.legend(fontsize = 10)
plt.show()